package com.offcn.bigdata.spark.p1

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

/**
  */
object RemoteScalaSparkWordCountApp {
    def main(args: Array[String]): Unit = {
        if(args == null || args.length != 1) {
            println(
                """
                  |Usage: <inputpath>
                """.stripMargin)
            System.exit(-1)
        }
        val Array(inputpath) = args

        val conf = new SparkConf()
//                .setMaster("local[*]")
                .setAppName(s"${RemoteScalaSparkWordCountApp.getClass.getSimpleName}")

        val sc = new SparkContext(conf)

        val lines: RDD[String] = sc.textFile(inputpath)

        println("partitions: " + lines.getNumPartitions)

        val words: RDD[String] = lines.flatMap(line => line.split("\\s+"))

        val pairs: RDD[(String, Int)] = words.map(word => (word, 1))

        val ret: RDD[(String, Int)] = pairs.reduceByKey((v1, v2) => v1 + v2)

        ret.foreach(t => println(t))

        sc.stop()
    }
}
